setwd("H:\\EB5205BarryWebAnalyticsCA\\nasa_source_code")
The working directory was changed to H:/EB5205BarryWebAnalyticsCA/nasa_source_code inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
library("arules")
Loading required package: Matrix
Attaching package: <U+393C><U+3E31>arules<U+393C><U+3E32>
The following objects are masked from <U+393C><U+3E31>package:base<U+393C><U+3E32>:
abbreviate, write
nasa_transactions = read.transactions(file="nasa_data/sessionized_data/sessionize_time_july.csv",rm.duplicates=TRUE, format="single", sep=",", cols=c("session_id","webpage"));
EOF within quoted stringnumber of items read is not a multiple of the number of columns
rules <- apriori(nasa_transactions, parameter = list(supp=0.01, conf=0.01, minlen=2))
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
0.01 0.1 1 none FALSE TRUE 5 0.01 2 10 rules FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 867
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[1614 item(s), 86764 transaction(s)] done [0.05s].
sorting and recoding items ... [50 item(s)] done [0.00s].
creating transaction tree ... done [0.03s].
checking subsets of size 1 2 3 done [0.01s].
writing ... [196 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
summary(rules)
set of 196 rules
rule length distribution (lhs + rhs):sizes
2 3
136 60
Min. 1st Qu. Median Mean 3rd Qu. Max.
2.000 2.000 2.000 2.306 3.000 3.000
summary of quality measures:
support confidence lift count
Min. :0.01002 Min. :0.04689 Min. : 0.3658 Min. : 869
1st Qu.:0.01134 1st Qu.:0.16473 1st Qu.: 1.5382 1st Qu.: 984
Median :0.01351 Median :0.27129 Median : 2.8714 Median :1172
Mean :0.01725 Mean :0.35568 Mean : 5.5899 Mean :1497
3rd Qu.:0.01858 3rd Qu.:0.47161 3rd Qu.: 6.4975 3rd Qu.:1612
Max. :0.04621 Max. :0.98117 Max. :30.4908 Max. :4009
mining info:
data ntransactions support confidence
nasa_transactions 86764 0.01 0.01
#inspect(rules)
# a useful plot of training data
itemFrequencyPlot(nasa_transactions,topN=20,type="absolute")
Interesting findings: 1. Apollo 13 the movie on the eventful 1969 mission to the moon was released at end of June 1995 which spiked a lot of interest in the Apollo 13 mission. Incidentally, Apollo 13 had launched from the KSC and so many people were searching for the history of the Apollo 13 mission presumably since the internet was in its nascent state and Wikipedia (or other open internet based encyclopedias) were not yet launched then. References: 1. https://en.wikipedia.org/wiki/Apollo_13_(film)
#read the test data
setwd("H:\\EB5205BarryWebAnalyticsCA\\nasa_source_code")
The working directory was changed to H:/EB5205BarryWebAnalyticsCA/nasa_source_code inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
nasa_aug_file = read.csv(file="nasa_data/sessionized_data/sessionize_time_aug.csv")
nasa_aug_file <- nasa_aug_file[c("session_id", "webpage")]
head(nasa_aug_file)
# execute ruleset using item as rule antecedent (handles single item antecedents only)
makepreds <- function(item, rulesDF) {
antecedent = paste("{",item,"} =>",sep="")
firingrules = rulesDF[grep(antecedent, rulesDF$rules,fixed=TRUE),1]
gsub(" ","",toString(sub("\\}","",sub(".*=> \\{","",firingrules))))
}
rulesDF = as(rules,"data.frame")
nasa_aug_file$preds = apply(nasa_aug_file,1,function(X) makepreds(X["webpage"], rulesDF))
head(nasa_aug_file)
# which are the top rules by lift
top.lift <- sort(rules, decreasing = TRUE, na.last = NA, by = "lift")
inspect(head(top.lift, 20))
lhs rhs support confidence lift count
[1] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/sounds/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01081094 0.9811715 30.49082 938
[2] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/movies/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01229773 0.9788991 30.42020 1067
[3] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo-13/images/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01212484 0.9713758 30.18641 1052
[4] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/sounds/} 0.01081094 0.3847416 27.81810 938
[5] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/images/} 0.01212484 0.4315012 27.54877 1052
[6] {/history/apollo/apollo-13/images/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01378452 0.8800589 27.34865 1196
[7] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/images/} 0.01378452 0.4283668 27.34865 1196
[8] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/sounds/} 0.01215942 0.3778653 27.32092 1055
[9] {/history/apollo/apollo-13/sounds/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01215942 0.8791667 27.32092 1055
[10] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/apollo-13.html} => {/history/apollo/apollo-13/movies/} 0.01229773 0.4376538 27.27916 1067
[11] {/history/apollo/apollo-13/movies/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01394588 0.8692529 27.01284 1210
[12] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/movies/} 0.01394588 0.4333811 27.01284 1210
[13] {/history/apollo/apollo-13/} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01169840 0.7917317 24.60380 1015
[14] {/history/apollo/apollo-13/apollo-13-info.html} => {/history/apollo/apollo-13/} 0.01169840 0.3635387 24.60380 1015
[15] {/history/apollo/apollo-13/apollo-13.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13-info.html} 0.01768014 0.4415659 13.72207 1534
[16] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo.html} => {/history/apollo/apollo-13/apollo-13.html} 0.01768014 0.9702720 13.06404 1534
[17] {/history/apollo/apollo-13/apollo-13.html,
/history/history.html} => {/history/apollo/apollo.html} 0.01778387 0.9571960 12.00320 1543
[18] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/sounds/} => {/history/apollo/apollo-13/apollo-13.html} 0.01081094 0.8890995 11.97111 938
[19] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/movies/} => {/history/apollo/apollo-13/apollo-13.html} 0.01229773 0.8818182 11.87307 1067
[20] {/history/apollo/apollo-13/apollo-13-info.html,
/history/apollo/apollo-13/images/} => {/history/apollo/apollo-13/apollo-13.html} 0.01212484 0.8795987 11.84319 1052
#remove duplicate items from a basket (itemstrg)
uniqueitems <- function(itemstrg) {
unique(as.list(strsplit(gsub(" ","",itemstrg),","))[[1]])
}
# extract unique predictions for each test session - predictions
userpreds = as.data.frame(aggregate(preds ~ session_id, data = nasa_aug_file, paste, collapse=","))
userpreds$preds = apply(userpreds,1,function(X) uniqueitems(X["preds"]))
# extract unique items visited most for each test session - actual
baskets = as.data.frame(aggregate(webpage ~ session_id, data = nasa_aug_file, paste, collapse=","))
baskets$webpage = apply(baskets,1,function(X) uniqueitems(X["webpage"]))
baskets
# count how many predictions are in the basket of items already seen by that user
# Caution : refers to "baskets" as a global
checkpreds <- function(preds, baskID) {
# print(baskID)
plist = preds[[1]]
blist = baskets[baskets$session_id == baskID,"webpage"][[1]]
cnt = 0
for (p in plist) {
if (p %in% blist) cnt = cnt+1
}
cnt
}
# count all predictions made
countpreds <- function(predlist) {
len = length(predlist)
if (len > 0 && (predlist[[1]] == "")) 0 # avoid counting an empty list
else len
}
#count how many unique predictions made are correct, i.e. have previously been bought (or rated highly) by the user
correctpreds = sum(apply(userpreds,1,function(X) checkpreds(X["preds"],X["session_id"])))
# count total number of unique predictions made
totalpreds = sum(apply(userpreds,1,function(X) countpreds(X["preds"][[1]])))
precision = correctpreds*100/totalpreds
cat("precision=", precision, "corr=",correctpreds,"total=",totalpreds)
precision= 10.41401 corr= 107453 total= 1031812
library(arulesViz)
package <U+393C><U+3E31>arulesViz<U+393C><U+3E32> was built under R version 3.4.4Loading required package: grid
#plot(rules)
plotly_arules(rules)
'plotly_arules' is deprecated.
Use 'plot' instead.
See help("Deprecated")
plot(rules, method="graph")
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)
plot(rules, method="graph",nodeCol=grey.colors(10),edgeCol=grey(.7),alpha=1)
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)
plot(rules, method="matrix")
Itemsets in Antecedent (LHS)
[1] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/sounds/}"
[2] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/movies/}"
[3] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo-13/images/}"
[4] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/apollo-13.html}"
[5] "{/history/apollo/apollo-13/apollo-13-info.html}"
[6] "{/history/apollo/apollo-13/images/}"
[7] "{/history/apollo/apollo-13/sounds/}"
[8] "{/history/apollo/apollo-13/movies/}"
[9] "{/history/apollo/apollo-13/}"
[10] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo.html}"
[11] "{/history/apollo/apollo-13/apollo-13.html,/history/history.html}"
[12] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/sounds/}"
[13] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/movies/}"
[14] "{/history/apollo/apollo-13/apollo-13-info.html,/history/apollo/apollo-13/images/}"
[15] "{/,/history/apollo/apollo.html}"
[16] "{/history/apollo/apollo.html,/shuttle/missions/missions.html}"
[17] "{/history/apollo/apollo-1/apollo-1.html}"
[18] "{/history/apollo/apollo-13/apollo-13.html,/history/apollo/apollo.html}"
[19] "{/,/history/history.html}"
[20] "{/shuttle/resources/orbiters/atlantis.html}"
[21] "{/history/apollo/apollo-13/apollo-13.html}"
[22] "{/shuttle/resources/orbiters/discovery.html}"
[23] "{/shuttle/missions/sts-78/mission-sts-78.html}"
[24] "{/history/apollo/apollo-11/apollo-11.html}"
[25] "{/shuttle/missions/missions.html,/shuttle/missions/sts-69/mission-sts-69.html}"
[26] "{/history/history.html,/shuttle/missions/missions.html}"
[27] "{/shuttle/missions/sts-73/mission-sts-73.html}"
[28] "{/shuttle/missions/sts-70/mission-sts-70.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[29] "{/shuttle/missions/sts-69/mission-sts-69.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[30] "{/history/apollo/apollo.html}"
[31] "{/shuttle/countdown/,/shuttle/missions/sts-70/images/images.html}"
[32] "{/shuttle/missions/sts-70/images/images.html}"
[33] "{/shuttle/missions/51-l/mission-51-l.html}"
[34] "{/shuttle/missions/sts-70/movies/movies.html}"
[35] "{/facilities/lc39a.html}"
[36] "{/shuttle/missions/missions.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[37] "{/shuttle/countdown/,/shuttle/missions/sts-71/images/images.html}"
[38] "{/,/shuttle/countdown/liftoff.html}"
[39] "{/shuttle/countdown/,/shuttle/missions/missions.html}"
[40] "{/ksc.html,/shuttle/countdown/liftoff.html}"
[41] "{/history/apollo/apollo.html,/history/history.html}"
[42] "{/shuttle/missions/sts-69/mission-sts-69.html}"
[43] "{/shuttle/missions/sts-70/images/images.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[44] "{/shuttle/countdown/liftoff.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[45] "{/history/history.html}"
[46] "{/shuttle/missions/sts-71/images/images.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[47] "{/,/shuttle/missions/sts-70/mission-sts-70.html}"
[48] "{/shuttle/missions/sts-70/mission-sts-70.html}"
[49] "{/shuttle/countdown/,/shuttle/missions/sts-70/mission-sts-70.html}"
[50] "{/ksc.html,/shuttle/missions/sts-70/mission-sts-70.html}"
[51] "{/shuttle/missions/sts-71/mission-sts-71.html}"
[52] "{/shuttle/missions/missions.html}"
[53] "{/,/shuttle/countdown/}"
[54] "{/shuttle/missions/missions.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[55] "{/ksc.html,/shuttle/countdown/}"
[56] "{/shuttle/countdown/,/shuttle/missions/sts-71/mission-sts-71.html}"
[57] "{/ksc.html,/shuttle/missions/sts-71/mission-sts-71.html}"
[58] "{/shuttle/countdown/countdown.html}"
[59] "{/shuttle/countdown/liftoff.html}"
[60] "{/shuttle/countdown/lps/fr.html}"
[61] "{/shuttle/countdown/,/shuttle/countdown/liftoff.html}"
[62] "{/shuttle/missions/sts-71/movies/movies.html}"
[63] "{/shuttle/countdown/}"
[64] "{/ksc.html,/shuttle/missions/missions.html}"
[65] "{/shuttle/missions/sts-71/images/images.html}"
[66] "{/}"
[67] "{/ksc.html}"
Itemsets in Consequent (RHS)
[1] "{/ksc.html}" "{/}"
[3] "{/shuttle/missions/sts-71/images/images.html}" "{/shuttle/missions/sts-71/movies/movies.html}"
[5] "{/shuttle/countdown/lps/fr.html}" "{/shuttle/countdown/countdown.html}"
[7] "{/shuttle/countdown/liftoff.html}" "{/shuttle/countdown/}"
[9] "{/shuttle/missions/missions.html}" "{/shuttle/missions/sts-71/mission-sts-71.html}"
[11] "{/shuttle/missions/sts-70/mission-sts-70.html}" "{/facilities/lc39a.html}"
[13] "{/shuttle/missions/sts-70/movies/movies.html}" "{/shuttle/missions/51-l/mission-51-l.html}"
[15] "{/shuttle/missions/sts-70/images/images.html}" "{/shuttle/missions/sts-69/mission-sts-69.html}"
[17] "{/shuttle/missions/sts-73/mission-sts-73.html}" "{/history/history.html}"
[19] "{/history/apollo/apollo.html}" "{/history/apollo/apollo-11/apollo-11.html}"
[21] "{/shuttle/missions/sts-78/mission-sts-78.html}" "{/shuttle/resources/orbiters/discovery.html}"
[23] "{/shuttle/resources/orbiters/atlantis.html}" "{/history/apollo/apollo-13/apollo-13.html}"
[25] "{/history/apollo/apollo-1/apollo-1.html}" "{/history/apollo/apollo-13/}"
[27] "{/history/apollo/apollo-13/movies/}" "{/history/apollo/apollo-13/images/}"
[29] "{/history/apollo/apollo-13/sounds/}" "{/history/apollo/apollo-13/apollo-13-info.html}"
plot(rules, method="paracoord", control=list(reorder=TRUE))
total_relevant_instances = nrow(nasa_aug_file)
recall = correctpreds * 100 / total_relevant_instances
recall
[1] 25.82508
library(arulesViz)
package <U+393C><U+3E31>arulesViz<U+393C><U+3E32> was built under R version 3.4.4Loading required package: grid
plot(top.lift, method="graph",nodeCol=grey.colors(10),edgeCol=grey(.7),alpha=1)
plot: Too many rules supplied. Only plotting the best 100 rules using <U+393C><U+3E31>support<U+393C><U+3E32> (change control parameter max if needed)